Chapter 6 Community composition

load("data/data.Rdata")

6.1 Taxonomy overview

6.1.1 Stacked barplot

genome_metadata<- genome_metadata%>%
    mutate(phylum=str_remove_all(phylum, "p__"))
genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
  geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
  scale_fill_manual(values=phylum_colors)+
  facet_grid(~region, scale="free", space="free") +
  guides(fill = guide_legend(ncol = 1)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

6.1.2 Phylum relative abundances

phylum_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  group_by(sample,phylum) %>%
  summarise(relabun=sum(count))

phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) %>%
    tt()
tinytable_ew4beqx7syaefil1sul7
phylum mean sd
Fusobacteriota 0.3663617229 0.136021624
Bacteroidota 0.3034917323 0.111266820
Bacillota_A 0.1494367879 0.072293629
Pseudomonadota 0.0977735470 0.055151857
Bacillota 0.0331239049 0.040190706
Bacillota_C 0.0271325097 0.041839388
Campylobacterota 0.0089299322 0.016614731
Actinomycetota 0.0069779735 0.008750923
Deferribacterota 0.0039464909 0.005804386
Bacillota_B 0.0016437166 0.004243115
Spirochaetota 0.0006735096 0.001927730
Desulfobacterota 0.0005081724 0.001525258
phylum_arrange <- phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun)) %>%
    arrange(-mean) %>%
    select(phylum) %>%
    pull()

phylum_summary %>%
    filter(phylum %in% phylum_arrange) %>%
    mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
    ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
        scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
        geom_jitter(alpha=0.5) + 
        theme_minimal() + 
        theme(legend.position="none") +
        labs(y="Phylum",x="Relative abundance")

6.1.3 Phylum percentages by site

Daneborg dogs

tinytable_1y059bl4noawy6vcxl8z
Phylum mean sd
Fusobacteriota 39.86040326 9.50276145
Bacteroidota 33.46096983 10.28066834
Bacillota_A 11.71124356 5.59102631
Pseudomonadota 9.77011822 3.32682054
Bacillota 2.07262689 1.05420229
Bacillota_C 1.65309681 0.73666970
Actinomycetota 0.53422369 0.87082467
Deferribacterota 0.43183843 0.45976979
Campylobacterota 0.30217355 0.55610573
Desulfobacterota 0.10163448 0.20496300
Spirochaetota 0.05087026 0.08756671
Bacillota_B 0.05080102 0.10006683

Ittoqqortoormii dogs

tinytable_87k35cynzsfuvasbwv5e
Phylum mean sd
Fusobacteriota 33.41194132 16.2729562
Bacteroidota 27.23737663 11.2373166
Bacillota_A 18.17611403 7.3137374
Pseudomonadota 9.78459118 7.1311314
Bacillota 4.55215410 5.3467364
Bacillota_C 3.77340513 5.7240774
Campylobacterota 1.48381289 2.1417871
Actinomycetota 0.86137102 0.8632284
Deferribacterota 0.35745975 0.6867294
Bacillota_B 0.27794229 0.5742640
Spirochaetota 0.08383167 0.2596527

6.2 Taxonomy boxplot

6.2.1 Family

family_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,family) %>%
  summarise(relabun=sum(count))

family_summary %>%
    group_by(family) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) %>%
    tt()
tinytable_pyiw3fiu8dozz79rp9ra
family mean sd
f__Fusobacteriaceae 3.663617e-01 0.1360216238
f__Bacteroidaceae 2.973931e-01 0.1131925162
f__Lachnospiraceae 6.831901e-02 0.0505223956
f__Burkholderiaceae_A 4.806831e-02 0.0196480683
f__Succinivibrionaceae 3.552330e-02 0.0361163460
f__Ruminococcaceae 2.705037e-02 0.0196101453
f__Peptostreptococcaceae 2.621345e-02 0.0203006445
f__Acidaminococcaceae 1.546376e-02 0.0071868623
f__Clostridiaceae 1.539613e-02 0.0295281580
f__Enterobacteriaceae 1.395292e-02 0.0401029267
f__Selenomonadaceae 1.166875e-02 0.0421561660
f__Erysipelotrichaceae 9.178545e-03 0.0109172103
f__Anaeroplasmataceae 6.090648e-03 0.0074218634
f__Coriobacteriaceae 6.011618e-03 0.0080478036
f__Lactobacillaceae 5.951066e-03 0.0298173221
f__Helicobacteraceae 5.803308e-03 0.0113002456
f__Mucispirillaceae 3.946491e-03 0.0058043860
f__Oscillospiraceae 3.664318e-03 0.0053396853
f__Turicibacteraceae 3.496100e-03 0.0111830905
f__Campylobacteraceae 3.126624e-03 0.0093571271
f__Coprobacillaceae 2.972458e-03 0.0055273393
f__Muribaculaceae 2.753827e-03 0.0066359854
f__Streptococcaceae 2.290454e-03 0.0077197113
f__CAG-508 2.285805e-03 0.0020219741
f__Enterococcaceae 2.244881e-03 0.0153913547
f__Anaerotignaceae 2.028736e-03 0.0030524632
f__Butyricicoccaceae 1.844323e-03 0.0021125655
f__Peptococcaceae 1.643717e-03 0.0042431146
f__UBA932 1.304836e-03 0.0038121201
f__Tannerellaceae 1.119621e-03 0.0025439141
f__CAG-274 9.414083e-04 0.0015219121
f__Marinifilaceae 9.203578e-04 0.0029931117
f__Brachyspiraceae 6.735096e-04 0.0019277303
f__CAG-826 6.216468e-04 0.0012720412
f__Anaerovoracaceae 5.526548e-04 0.0011437465
f__Eggerthellaceae 5.224363e-04 0.0008857518
f__Desulfovibrionaceae 5.081724e-04 0.0015252576
f__Peptoniphilaceae 4.681118e-04 0.0016902695
f__Bifidobacteriaceae 4.439194e-04 0.0032700701
f__Cellulosilyticaceae 4.144138e-04 0.0017399622
f__Beijerinckiaceae 1.815455e-04 0.0013826090
f__Mycoplasmoidaceae 1.276422e-04 0.0006902477
f__JAAYXM01 1.252571e-04 0.0004581646
f__ 8.847277e-05 0.0003869487
f__UBA3375 8.372673e-05 0.0002904414
f__Catellicoccaceae 6.673736e-05 0.0005082566
f__Burkholderiaceae_C 4.747760e-05 0.0002888266
f__Acutalibacteraceae 4.433113e-05 0.0002541989
family_arrange <- family_summary %>%
    group_by(family) %>%
    summarise(mean=sum(relabun)) %>%
    arrange(-mean) %>%
    select(family) %>%
    pull()

# Per region
family_summary %>%
    left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    filter(family %in% family_arrange[1:20]) %>%
    mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
        scale_color_manual(values=phylum_colors[-8]) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~region)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")

6.2.2 Genus

genus_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,phylum,genus) %>%
  summarise(relabun=sum(count)) %>%
  filter(genus != "g__") %>%
  mutate(genus= sub("^g__", "", genus))

genus_summary_sort <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) 

genus_summary_sort %>%
    tt()
tinytable_2zpgeb53nx9um5wq6iz2
genus mean sd
Fusobacterium_A 1.717979e-01 9.816809e-02
Phocaeicola 1.633786e-01 7.071086e-02
Fusobacterium_B 1.584751e-01 7.096538e-02
Alloprevotella 4.510262e-02 5.111930e-02
Sutterella 4.199309e-02 1.870638e-02
Mediterranea 3.651435e-02 2.488149e-02
Anaerobiospirillum 3.467102e-02 3.597278e-02
Bacteroides 2.979760e-02 1.616196e-02
Faecalibacterium 2.122649e-02 1.654400e-02
Peptacetobacter 1.961270e-02 1.591090e-02
Blautia 1.780596e-02 1.930155e-02
Phascolarctobacterium_A 1.546376e-02 7.186862e-03
Prevotella 1.441140e-02 3.214202e-02
Escherichia 1.365996e-02 3.991853e-02
Megamonas 1.166875e-02 4.215617e-02
Faecalimonas 1.019823e-02 1.584840e-02
Sarcina 9.177124e-03 2.107934e-02
Blautia_A 8.640935e-03 7.338491e-03
Paraprevotella 7.600772e-03 1.567049e-02
Ruminococcus_B 7.140240e-03 1.169737e-02
CALUXS01 6.090648e-03 7.421863e-03
Collinsella 6.011618e-03 8.047804e-03
Eisenbergiella 6.000636e-03 5.665431e-03
Schaedlerella 5.689622e-03 7.173958e-03
Aphodousia 5.602216e-03 5.889228e-03
Peptostreptococcus 4.022476e-03 1.301728e-02
Allobaculum 3.729700e-03 3.752479e-03
Turicibacter 3.496100e-03 1.118309e-02
Cetobacterium_A 3.421893e-03 1.764385e-02
Ligilactobacillus 3.334587e-03 1.801788e-02
Campylobacter_D 3.126624e-03 9.357127e-03
Clostridium 3.026090e-03 6.730894e-03
Mucispirillum 2.978653e-03 5.517462e-03
Limisoma 2.753827e-03 6.635985e-03
Faecousia 2.637273e-03 4.060837e-03
Avimicrobium 2.503860e-03 2.344204e-03
Enterococcus_B 2.171357e-03 1.539372e-02
Fournierella 2.170025e-03 2.069538e-03
Helicobacter_A 2.168108e-03 7.080641e-03
Lactobacillus 2.107807e-03 1.031698e-02
Helicobacter_G 1.996833e-03 9.063015e-03
Ventrimonas 1.931520e-03 2.963800e-03
Lachnospira 1.875220e-03 4.241014e-03
Enterocloster 1.851708e-03 1.957803e-03
Butyricicoccus 1.844323e-03 2.112565e-03
Streptococcus 1.681189e-03 7.369580e-03
UMGS1590 1.643717e-03 4.243115e-03
Merdicola 1.589782e-03 1.114922e-03
JAHHTG01 1.420336e-03 7.838952e-03
Cryptobacteroides 1.304836e-03 3.812120e-03
Anaerotignum 1.252553e-03 1.964794e-03
Mediterraneibacter 1.232300e-03 6.915086e-03
Parabacteroides 1.119621e-03 2.543914e-03
Faecalibacillus 1.062936e-03 5.329853e-03
Holdemanella 9.950546e-04 2.092863e-03
CALVGN01 9.893358e-04 1.439612e-03
Clostridium_Q 9.645699e-04 1.827770e-03
Odoribacter 9.203578e-04 2.993112e-03
Clostridium_H 9.149621e-04 2.869388e-03
Clostridium_J 9.070087e-04 2.879388e-03
Helicobacter_C 8.779979e-04 3.338818e-03
Roseburia 8.764896e-04 3.399869e-03
Hungatella_A 8.718369e-04 1.940106e-03
Amedibacterium 8.709000e-04 4.384047e-03
Gallispira 8.122097e-04 1.377777e-03
Anaerobiospirillum_A 7.850619e-04 1.690167e-03
GCA-900066495 7.626101e-04 2.566438e-03
Copromonas 7.534693e-04 1.440772e-03
CAJMNU01 7.334341e-04 1.047209e-03
Negativibacillus 7.228149e-04 1.563832e-03
CAG-269 6.960231e-04 1.872399e-03
Faecalitalea 6.890936e-04 1.421625e-03
Brachyspira 6.735096e-04 1.927730e-03
Helicobacter_B 6.613746e-04 2.219386e-03
Fimicola 6.261776e-04 2.448396e-03
Onthovivens 6.216468e-04 1.272041e-03
Lactococcus 6.092653e-04 2.670389e-03
Dwaynesavagella 5.973489e-04 2.406470e-03
Romboutsia 5.899953e-04 1.971664e-03
Avilachnospira 5.891252e-04 1.336502e-03
Phocaeicola_A 5.877376e-04 1.508366e-03
Gallibacter 5.526548e-04 1.143747e-03
Clostridium_G 5.265080e-04 1.922810e-03
Thomasclavelia 5.243206e-04 7.708176e-04
Catenibacterium 5.232710e-04 1.436336e-03
Slackia_A 5.224363e-04 8.857518e-04
Limosilactobacillus 5.086724e-04 1.882237e-03
Mailhella 5.081724e-04 1.525258e-03
Fimiplasma 4.808492e-04 1.238112e-03
Anaerosphaera 4.681118e-04 1.690270e-03
Bifidobacterium 4.439194e-04 3.270070e-03
Dysosmobacter 4.111732e-04 9.687459e-04
Oliverpabstia 3.836669e-04 5.238535e-04
Paraclostridium 3.811164e-04 1.012270e-03
Parasutterella 3.695571e-04 2.814463e-03
Romboutsia_C 3.675859e-04 2.251512e-03
UBA9414 3.024223e-04 5.724971e-04
Klebsiella 2.929593e-04 1.574562e-03
Amedibacillus 2.597565e-04 1.203178e-03
Terrisporobacter 2.530260e-04 1.078568e-03
MGBC140090 2.530092e-04 6.302546e-04
UMGS1370 2.472629e-04 4.566485e-04
Zhenhengia 2.311744e-04 1.368096e-03
Dielma 2.243685e-04 4.384118e-04
CCUG-7971 2.239356e-04 8.642412e-04
Lawsonibacter 2.229983e-04 6.420821e-04
Pseudoflavonifractor_A 1.988031e-04 3.210845e-04
Cellulosilyticum 1.832394e-04 1.114445e-03
Rhodoblastus 1.815455e-04 1.382609e-03
Hathewaya 1.754062e-04 6.154773e-04
Metalachnospira 1.500057e-04 2.916143e-04
Pseudoscilispira 1.399742e-04 2.488383e-04
Beduini 1.280716e-04 2.691526e-04
Mycoplasmoides 1.276422e-04 6.902477e-04
RGIG7332 1.252571e-04 4.581646e-04
Merdivicinus 1.182461e-04 4.220816e-04
Anaerofilum 1.153185e-04 4.683465e-04
Duodenibacillus 1.034486e-04 2.178367e-04
Helicobacter_D 9.899513e-05 5.412740e-04
JAGZHZ01 8.440962e-05 2.791793e-04
UBA3375 8.372673e-05 2.904414e-04
RGIG3102 7.504570e-05 3.295910e-04
Angelakisella 7.436527e-05 2.555538e-04
Enterococcus 7.352338e-05 3.798820e-04
Clostridium_AH 7.168151e-05 4.955652e-04
Acetatifactor 7.039992e-05 2.468687e-04
Succinivibrio 6.721576e-05 2.247326e-04
Catellicoccus 6.673736e-05 5.082566e-04
Evtepia 5.409691e-05 3.186964e-04
Paenalcaligenes 4.747760e-05 2.888266e-04
Scybalenecus 4.433113e-05 2.541989e-04
UBA866 4.420840e-05 2.517765e-04
Scybalocola 3.695431e-05 1.459453e-04
Merdisoma 2.318332e-05 9.000681e-05
genus_arrange <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=sum(relabun)) %>%
    filter(genus != "g__")%>%
    arrange(-mean) %>%
    select(genus) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    pull()

#Per region
genus_summary %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    mutate(genus=factor(genus, levels=rev(genus_summary_sort %>% pull(genus)))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
        scale_color_manual(values=phylum_colors) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~region)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")